---
title: "Estimating the population AMCE"
author: "Mitsuru Mukaigawara, Tianyu Su, Katie Tucker"
date: "11/27/2020"
output:
  pdf_document: default
  html_document: default
---

```{r setup, include = FALSE}
knitr::opts_chunk$set(message = FALSE, warning = FALSE)
```

```{r, include = FALSE}
# Load packages ---
library(factorEx) #A package by Egami et al to produce pAMCE results
library(readr)
library(dplyr)
library(tidyr)
library(snow)
library(gdata)

# Note: required datasets for this analysis include: Terrorism.RData, Terror_Class39_Obs.csv, and Terror_Class39_Obs_collapsed.csv.
```

# Main data

The purpose of this replication is to analyze if the results by Huff and Kertzer (AJPS, 2018) will change when the marginal distribution of each factor is constructed from the real-world examples. First, the original data produced by Huff and Kertzer is loaded.

```{r}

# Load cleaned data
load("~/Dropbox/2020_Terrorism_Replication/02_data/Terrorism.RData") #Please change the directory appropriately
head(dat2)
```

# Target distribution

To perform this analysis, we need a dataset for target distribution. This is a list of all the marginal probabilities for each factor. In this section, we clean the target distribution dataset.

```{r}

# Convert the probabilities to the list ---
Terror_Class_39_Obs <- read_csv("Terror_Class_39_Obs.csv") #Original data for target distribution
terror_39_transpose <- as.data.frame(t(as.matrix(Terror_Class_39_Obs))) #Transpose
terror_39_transpose <- cbind(rownames(terror_39_transpose), 
                             data.frame(terror_39_transpose, row.names=NULL)) #Rename

terror_39_transpose %>% #Separate factors and levels
  separate("rownames(terror_39_transpose)", c("factor", "levels"), sep=": ") -> terror_39

# Extract probabilities data by factors ---
create_vector <- function(range, data){
  results <- as.numeric(data[range, 3])
  names <- as.vector(data[range, 2])
  names(results) <- names
  return(results)
} #This is a function to create a vector of probabilities for each factor

Tactic <- create_vector(1:4, terror_39) #Probabilities of each level for the factor "tactic"
Casualties <- create_vector(5:8, terror_39) #Same as above
Actor <- create_vector(9:14, terror_39)
Motivation <- create_vector(20:24, terror_39)
Target <- create_vector(25:33, terror_39)
Location <- create_vector(34:38, terror_39)

# Create a target distribution vector for "Identity" separately due to a spelling error
Identity_results <- as.numeric(terror_39[15:19, 3])
Identity_names <- as.vector(terror_39[15:19, 2])
Identity_names[5] <- "Right-Wing" #Spelling error in the original dataset
names(Identity_results) <- Identity_names
Identity <- Identity_results

# Combine numeric vectors to create the list for target distribution ---
target_distribution_39 <- list(Tactic = Tactic,
                               Casualties = Casualties, 
                               Actor = Actor,
                               Ideology = Identity, #Rename the factor Identity as Ideology
                               Motivation = Motivation, 
                               Target = Target, 
                               Location = Location)
```

Then, we clean the original data (dat2) so that each level/factor matches that of the target distribution.

```{r}

# Clean the original data (the data and target distribution use different levels) ---
dat2 %>% 
  select(ID_numeric, TerrorBinary, Tactic, Casualties, Actor, 
         Ideology, Motivation, Target, Location) -> data_analysis #Selecting appropriate columns

# Tactic
data_analysis$Tactic <- as.character(data_analysis$Tactic)
  
data_analysis$Tactic[data_analysis$Tactic == "protest"] <- "Protest"
data_analysis$Tactic[data_analysis$Tactic == "hostage taking"] <- "Hostage Taking"
data_analysis$Tactic[data_analysis$Tactic == "shooting"] <- "Shooting"
data_analysis$Tactic[data_analysis$Tactic == "bombing"] <- "Bombing"

# Casualties
data_analysis$Casualties <- as.character(data_analysis$Casualties)

data_analysis$Casualties[data_analysis$Casualties == "were no individuals"] <- "None"
data_analysis$Casualties[data_analysis$Casualties == "was one individual"] <- "One"
data_analysis$Casualties[data_analysis$Casualties == "were two individuals"] <- "Two"
data_analysis$Casualties[data_analysis$Casualties == "were ten individuals"] <- "Ten"

# Actor
data_analysis$Actor <- as.character(data_analysis$Actor)

data_analysis$Actor[data_analysis$Actor == "group"] <- "Group"
data_analysis$Actor[data_analysis$Actor == "individual"] <- "Individual"
data_analysis$Actor[data_analysis$Actor == "individual with a history of mental illness"] <- 
  "Individual with a history of mental illness"
data_analysis$Actor[data_analysis$Actor == "organization"] <- "Organization"
data_analysis$Actor[data_analysis$Actor == "organization with ties to the United States"] <- 
  "Organization with ties to the USA"
data_analysis$Actor[data_analysis$Actor == "organization with ties to a foreign government"] <- 
  "Organization with ties to a foreign government"

# Ideology
data_analysis$Ideology <- as.character(data_analysis$Ideology)

data_analysis$Ideology[data_analysis$Ideology == "a right-wing"] <- "Right-Wing"
data_analysis$Ideology[data_analysis$Ideology == "a Muslim"] <- "Muslim"
data_analysis$Ideology[data_analysis$Ideology == "a left-wing"] <- "Left-Wing"
data_analysis$Ideology[data_analysis$Ideology == "an"] <- "Unspecified"
data_analysis$Ideology[data_analysis$Ideology == "a Christian"] <- "Christian"

# Motivation
data_analysis$Motivation <- as.character(data_analysis$Motivation)

data_analysis$Motivation[data_analysis$Motivation == 
                           "News reports suggest the incident was motivated by the goal of changing government policy."] <- "Change Policy"
data_analysis$Motivation[data_analysis$Motivation == 
                           "News reports suggest that there was no clear motivation for the incident."] <- "Unclear"
data_analysis$Motivation[data_analysis$Motivation == 
                           "News reports suggest the incident was motivated by hatred towards the target."] <- "Hatred for the target"
data_analysis$Motivation[data_analysis$Motivation == 
                           "News reports suggest the incident was motivated by the goal of overthrowing the government."] <- "Overthrow Government"
data_analysis$Motivation[data_analysis$Motivation == 
                           "News reports suggest the individual had been in an ongoing personal dispute with one of the targets."] <- "Personal Dispute with the target"

# Target
data_analysis$Target <- as.character(data_analysis$Target)

data_analysis$Target[data_analysis$Target == "synagogue"] <- "Synagogue"
data_analysis$Target[data_analysis$Target == "school"] <- "School"
data_analysis$Target[data_analysis$Target == "police station"] <- "Police Station"
data_analysis$Target[data_analysis$Target == "mosque"] <- "Mosque"
data_analysis$Target[data_analysis$Target == "Christian community center"] <- "Christian Center"
data_analysis$Target[data_analysis$Target == "military facility"] <- "Military Facility"
data_analysis$Target[data_analysis$Target == "Jewish community center"] <- "Jewish Center"
data_analysis$Target[data_analysis$Target == "Muslim community center"] <- "Muslim Center"
data_analysis$Target[data_analysis$Target == "church"] <- "Church"

# Location
data_analysis$Location <- as.character(data_analysis$Location)

data_analysis$Location[data_analysis$Location == "a foreign democracy"] <- "Foreign Democracy"
data_analysis$Location[data_analysis$Location == "the United States"] <- "United States"
data_analysis$Location[data_analysis$Location == "a foreign democracy with a history of human rights violations"] <- "Foreign Democracy with a History of human rights violations"
data_analysis$Location[data_analysis$Location == "a foreign dictatorship"] <- "Dictatorship"
data_analysis$Location[data_analysis$Location == "a foreign dictatorship with a history of human rights violations"] <- "Dictatorship with a history of human rights violations"
# Note that in the last line I intentionally spelled "history," not a capital "History"
```

```{r}

data_analysis$Tactic <- as.factor(data_analysis$Tactic)
data_analysis$Casualties <- as.factor(data_analysis$Casualties)
data_analysis$Actor <- as.factor(data_analysis$Actor)
data_analysis$Ideology <- as.factor(data_analysis$Ideology)
data_analysis$Motivation <- as.factor(data_analysis$Motivation)
data_analysis$Target <- as.factor(data_analysis$Target)
data_analysis$Location <- as.factor(data_analysis$Location)

# Relevel the data
data_analysis$Tactic = relevel(data_analysis$Tactic, ref = "Shooting") #Different from the original baseline
data_analysis$Casualties = relevel(data_analysis$Casualties, ref = "None")
data_analysis$Actor = relevel(data_analysis$Actor, ref = "Individual")
data_analysis$Ideology = relevel(data_analysis$Ideology, ref = "Unspecified")
data_analysis$Motivation = relevel(data_analysis$Motivation, ref = "Change Policy") #Different from the original baseline
data_analysis$Target = relevel(data_analysis$Target, ref = "Military Facility") 
data_analysis$Location = relevel(data_analysis$Location, ref = "Foreign Democracy") #Different from the original baseline
```


# pAMCE (model-based)

We then construct population AMCE model using the target distribution that was produced above and the original data. To do this, we use model_pAMCE function defined by Egami et al (https://github.com/naoki-egami/factorEx).

For this paper, we focus on two levels "Casualties" and "Actor" because the same baseline levels are only acceptable for these factors when using model_pAMCE function (We are still investigating why this is the case). We first construct a new model using a different target distribution, and then extract the data from the original analysis.

```{r}

### Please note that running model_pAMCE function takes approximately 1-2 minutes ###

out_model <- 
  model_pAMCE(formula = TerrorBinary ~ Tactic + Casualties + Actor + Ideology + 
                Motivation + Target + Location,
              data = data_analysis, 
              reg = TRUE,
              target_dist = target_distribution_39, target_type = "marginal")
```

```{r}

# Extract the original results (copied from the replication code by the authors) ---

# Converting survey weights to numeric 
dat2$webal2 <- as.numeric(dat2$webal2)

# Do parallel processing - speeds things up
cl <- makeCluster(8,"SOCK")

# Function to cluster bootstrap standard errors -- will be used for all analyses
clusterBootS2 <- function(dat2){
  i <- sample(unique(dat2$ID_numeric),length(unique(dat2$ID_numeric)),replace=TRUE)
  row.nums <- NULL
  for (j in 1:length(i)){
    row.nums <- c(row.nums, which(dat2$ID_numeric==i[j]))
  }
  return(dat2[row.nums,])
}

# Function to run model and then cluster standard errors
bootConjoint <- function(...){
  temp <- clusterBootS2(dat2)
  mod.temp <- lm(TerrorBinary ~  Tactic + Casualties + Actor + Ideology + Motivation + Target + Location, weights=webal2, data=temp)
  return(coef(mod.temp))
}

clusterExport(cl, list("dat2", "bootConjoint", "clusterBootS2"))

system.time(boot.full2 <- parSapply(cl, rep(1,1500), bootConjoint)) 

# Generating a Matrix where each row is a treatment category, the first column is the coefficient estimate, and the second and fourth columns give the 95% confidence intervals, while the third and fifth give the 90% 
plot.mat2 <- cbind(apply(boot.full2, 1, mean), apply(boot.full2, 1, quantile, c(0.025)), apply(boot.full2, 1, quantile, c(0.05)), apply(boot.full2, 1, quantile, c(0.95)), apply(boot.full2,1,quantile, c(0.975)))[-1,]

# Now add baseline categories: these will be zeroes on the plot which will allow us to make easy comparisons for the reader
plot.mat2b <- rbind(rep(0,5), plot.mat2[1:3,], rep(0,5), plot.mat2[4:6,], rep(0,5), plot.mat2[7:11,], rep(0,5), plot.mat2[12:15,], rep(0,5), 
                    plot.mat2[16:19,], rep(0,5), plot.mat2[c(20:27),], rep(0,5), plot.mat2[28:31,])
```

```{r}

# Casualties plot ---
textLabFigure1 <- c("No Casualties", "One Casualty", "Two Casualties", "Ten Casualties")

# Data for new AMCEs (factor: casualties)
Casualties_newdata <- out_model$AMCE$Casualties %>% filter(type == "target_1")

# Plot
pdf("fig1.pdf", height=5, width=5.5)
par(oma=c(0,1,0,0), mar=c(3,0,1,0)) 
plot(plot.mat2b[1:(length(textLabFigure1) - 1),1], (length(textLabFigure1) - 1):1, type="n", axes=FALSE, xlab="Average marginal component effect (AMCE)", ylab="",xlim=c(-0.6,0.6))
points(plot.mat2b[6,1], 3, pch=16) # Point estimate, original AMCE, one casualty
points(plot.mat2b[7,1], 2, pch=16) # Two casulties
points(plot.mat2b[8,1], 1, pch=16) # Ten casualties
points(Casualties_newdata["8", "estimate"], 3, pch=16, col="red") # Point estimate, new AMCE, one casualty
points(Casualties_newdata["10", "estimate"], 2, pch=16, col="red") # Two casualties
points(Casualties_newdata["12", "estimate"], 1, pch=16, col="red") # Ten casualties
lines(plot.mat2b[6,c(3,4)], c(3,3), lwd=2) # CI, original AMCE, One casualty
lines(plot.mat2b[7,c(3,4)], c(2,2), lwd=2) # Two casualties
lines(plot.mat2b[8,c(3,4)], c(1,1), lwd=2) # Ten casualties
lines(Casualties_newdata["8",c(6,7)], c(3,3), lwd=2, col="red") # CI, new AMCE, One casualty
lines(Casualties_newdata["10",c(6,7)], c(2,2), lwd=2, col="red") # Two casualties
lines(Casualties_newdata["12",c(6,7)], c(1,1), lwd=2, col="red") # Ten casualties
text(-.5, 3, "One Casualty", cex=0.9)
text(-.5, 2, "Two Casualties", cex=0.9)
text(-.5, 1, "Ten Casualties", cex=0.9)
text(.5, 3, "Black = original \nRed = pAMCE", cex=0.9)
abline(v=0)
axis(1)
dev.off()
```

```{r}
# Actors Plot
Actors_newdata <- out_model$AMCE$Actor %>% filter(type == "target_1")

textActor <- c("Individual", "Individual Mental", "Group", "Organization", "Org US Ties", "Org Foreign Ties")

pdf("fig2.pdf", height=5, width=5.5)
par(oma=c(0,1,0,0), mar=c(4,0,1,0))
plot(plot.mat2b[1:(length(textActor) - 1),1], (length(textActor) - 1):1, type="n", axes=FALSE, xlab="Average marginal component effect (AMCE)", ylab="",xlim=c(-0.6,0.6))
points(plot.mat2b[10,1], 5, pch=16) # Point est, original, Individual Mental
points(plot.mat2b[11,1], 4, pch=16) # Group
points(plot.mat2b[12,1], 3, pch=16) # Organization
points(plot.mat2b[13,1], 2, pch=16) # Org US Ties
points(plot.mat2b[14,1], 1, pch=16) # Org Foreign Ties
points(Actors_newdata["16", "estimate"], 5, pch=16, col="red") # Point estimate, new AMCE, Indiv Mental
points(Actors_newdata["14", "estimate"], 4, pch=16, col="red") # Group
points(Actors_newdata["18", "estimate"], 3, pch=16, col="red") # Organization
points(Actors_newdata["22", "estimate"], 2, pch=16, col="red") # Org US Ties
points(Actors_newdata["20", "estimate"], 1, pch=16, col="red") # Org Foreign Ties
lines(plot.mat2b[10,c(3,4)], c(5,5), lwd=2) # CI, original, Individual Mental
lines(plot.mat2b[11,c(3,4)], c(4,4), lwd=2) # Group
lines(plot.mat2b[12,c(3,4)], c(3,3), lwd=2) # Organization
lines(plot.mat2b[13,c(3,4)], c(2,2), lwd=2) # Org US Ties
lines(plot.mat2b[14,c(3,4)], c(1,1), lwd=2) # Org Foreign Ties 
lines(Actors_newdata["16",c(6,7)], c(5,5), lwd=2, col="red") # CI, new AMCE, Indiv Mental
lines(Actors_newdata["14",c(6,7)], c(4,4), lwd=2, col="red") # Group
lines(Actors_newdata["18",c(6,7)], c(3,3), lwd=2, col="red") # Organization
lines(Actors_newdata["22",c(6,7)], c(2,2), lwd=2, col="red") # Org US Ties
lines(Actors_newdata["20",c(6,7)], c(1,1), lwd=2, col="red") # Org Foreign Ties
text(-.5, 5, "Individual Mental", cex=0.9)
text(-.5, 4, "Group", cex=0.9)
text(-.5, 3, "Organization", cex=0.9)
text(-.5, 2, "Org US Ties", cex=0.9)
text(-.5, 1, "Org Foreign Ties", cex=0.9)
text(.5, 5, "Black = original \nRed = pAMCE", cex=0.9)
abline(v=0)
axis(1)
dev.off()
```

### Please note that the following lines are not incorporated to the main paper. The following lines are only for showing that the collapsed levels do not allow us to use the original baseline levels. The main figures (Figure 1 and 2) in the text are produced in the previous two R chunks. ###

# Collapsing some levels

Another thing we could do is to collapse some levels in several factors that have no observations. These null observations prohibited us from setting the same level as the original model. If we can collapse the null levels, we could potentially compare the sample AMCE and pAMCE using model_pAMCE code.

```{r}
# Construct target distribution without null levels (Christianity and attacks on christian centers)
Terror_Class_39_Obs_collapsed <- read_csv("Terror_Class_39_Obs_collapsed.csv") #Original data for target distribution
terror_39_transpose_collapsed <- as.data.frame(t(as.matrix(Terror_Class_39_Obs_collapsed))) #Transpose
terror_39_transpose_collapsed <- cbind(rownames(terror_39_transpose_collapsed), 
                             data.frame(terror_39_transpose_collapsed, row.names=NULL)) #Rename

terror_39_transpose_collapsed %>% #Separate factors and levels
  separate("rownames(terror_39_transpose_collapsed)", c("factor", "levels"), sep=": ") -> terror_39_collapsed

# Extract probabilities data by factors ---
create_vector <- function(range, data){
  results <- as.numeric(data[range, 3])
  names <- as.vector(data[range, 2])
  names(results) <- names
  return(results)
} #This is a function to create a vector of probabilities for each factor

Tactic <- create_vector(1:4, terror_39_collapsed) #Probabilities of each level for the factor "tactic"
Casualties <- create_vector(5:8, terror_39_collapsed) #Same as above
Actor <- create_vector(9:14, terror_39_collapsed)
Motivation <- create_vector(19:23, terror_39_collapsed)
Target <- create_vector(24:31, terror_39_collapsed)
Location <- create_vector(32:36, terror_39_collapsed)

# Create a target distribution vector for "Identity" separately due to a spelling error
Identity_results <- as.numeric(terror_39_collapsed[15:18, 3])
Identity_names <- as.vector(terror_39_collapsed[15:18, 2])
Identity_names[4] <- "Right-Wing" #Spelling error in the original dataset
names(Identity_results) <- Identity_names
Identity <- Identity_results

# Combine numeric vectors to create the list for target distribution ---
target_distribution_39_collapsed <- list(Tactic = Tactic,
                               Casualties = Casualties, 
                               Actor = Actor,
                               Ideology = Identity, #Rename the factor Identity as Ideology
                               Motivation = Motivation, 
                               Target = Target, 
                               Location = Location)

```

```{r}
# Collapse two pairs of levels in the observed data ---
data_analysis_collapsed <- data_analysis

# Combine Christian and Muslim as one religious category
data_analysis_collapsed$Ideology[data_analysis_collapsed$Ideology == "Christian"] <- "Muslim"

# Combine Christian center and Church as one religous place
data_analysis_collapsed$Target[data_analysis_collapsed$Target == "Christian Center"] <- "Church"

# Drop unused levels
data_analysis_collapsed <- drop.levels(data_analysis_collapsed)

```

However, as we can see below, we were unable to use the original baseline levels. The original levels were not suitable for analysis because there were some null "interactions" (not levels).

```{r}
# Relevel the data
data_analysis_collapsed$Casualties = relevel(data_analysis_collapsed$Casualties, ref = "None")
data_analysis_collapsed$Actor = relevel(data_analysis_collapsed$Actor, ref = "Individual")
data_analysis_collapsed$Ideology = relevel(data_analysis_collapsed$Ideology, ref = "Unspecified")
data_analysis_collapsed$Target = relevel(data_analysis_collapsed$Target, ref = "Military Facility") 
data_analysis_collapsed$Tactic = relevel(data_analysis_collapsed$Tactic, ref = "Shooting") #Different from the original baseline
data_analysis_collapsed$Motivation = relevel(data_analysis_collapsed$Motivation, ref = "Change Policy") #Different from the original baseline
data_analysis_collapsed$Location = relevel(data_analysis_collapsed$Location, ref = "Foreign Democracy") #Different from the original baseline

# Run the model
out_model_collapsed <- 
  model_pAMCE(formula = TerrorBinary ~ Tactic + Casualties + Actor + Ideology + 
                Motivation + Target + Location,
              data = data_analysis_collapsed, 
              reg = TRUE,
              target_dist = target_distribution_39_collapsed, target_type = "marginal")
```